This notebook analyzes gene expression in each cluster across multiple brain regions (SN, PUT, PFC, AMY) in the ASAP-PMDBS snRNA-seq dataset. The workflow focuses on: 1. Data preparation: loading gene-level counts from trusTEr output, organizing by cluster and region. 2. PD vs Control analysis: testing differential expression between PD and control within each cluster and region. 3. PD vs Control GSEA: testing enrichment of GO terms based on DEA (step 2) of each cluster and region. 4. Functional focus: visualizing interferon (IFN)-related signatures across clusters. 5. Visualization: generating mean-difference plots, and violin/boxplots.
Load all required libraries and custom DEA functions
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first(), S4Vectors::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks data.table::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
library(pheatmap)
library(clusterProfiler)
##
## clusterProfiler v4.12.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
##
## The following object is masked from 'package:purrr':
##
## simplify
##
## The following object is masked from 'package:IRanges':
##
## slice
##
## The following object is masked from 'package:S4Vectors':
##
## rename
##
## The following object is masked from 'package:stats':
##
## filter
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:clusterProfiler':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
library(EnhancedVolcano)
## Loading required package: ggrepel
source("TE_DEA_functions.R")
Gather featureCount outputs, extract region and cluster IDs, merge
them into cluster-specific count matrices and save them for convenience
as .csv
path <- "~/inbox/ASAP/trusTEr_output_vs2"
files <- list.files(path, recursive = T, full.names = T)
files <- files[which(grepl("gene_counts/unique/", files))]
files <- files[which(!endsWith(files, "summary"))]
files <- data.frame(files = files)
files$region <- sapply(str_split(files$files, "/"), `[[`, 7)
files$cluster <- sapply(str_split(files$files, "_uniqueMap"), `[[`, 1)
files$cluster <- sapply(str_split(files$cluster, "/"), tail, 1)
files$cluster_num <- sapply(str_split(files$cluster, "_"), tail, 1)
files$region_cluster <- paste(files$region, files$cluster_num, sep="_")
files_split <- split(files, f = c(files$region_cluster))
regions <- unique(files$region)
for(cluster in as.character(0:7)){
for(region in regions){
region_cluster <- paste(region, cluster, sep="_")
files_region_cluster <- files_split[[region_cluster]]
for(f in 1:length(files_region_cluster$files)){
file <- files_region_cluster$files[f]
if(f == 1){
gene_counts <- fread(file, data.table = F)
colnames(gene_counts)[ncol(gene_counts)] <- str_remove_all(sapply(str_split(colnames(gene_counts)[ncol(gene_counts)], "/"), tail, 1), "_Aligned.sortedByCoord.out.bam")
}else{
tmp <- fread(file, data.table = F)
colnames(tmp)[ncol(tmp)] <- str_remove_all(sapply(str_split(colnames(tmp)[ncol(tmp)], "/"), tail, 1), "_Aligned.sortedByCoord.out.bam")
if(all(tmp$Geneid == gene_counts$Geneid)){
gene_counts <- cbind(gene_counts, tmp[,ncol(tmp),drop=F])
}else{
gene_counts <- merge(gene_counts, tmp[,c("Geneid", colnames(tmp)[ncol(tmp)])], by="Geneid")
}
}
}
write.csv(gene_counts, paste("~/inbox/ASAP/trusTEr_output_vs2/", region, "/gene_counts_", paste(region, cluster, sep="_"), ".csv", sep=""), row.names = F)
}
}
Differential expression analyses for PD vs Control pseudobulks for each region and cluster pair. Save per-cluster results (one excel with region per sheet).
samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
gene_dds_list <- list()
gene_exp_list <- list()
gene_res_list <- list()
gene_res_list_df <- list()
regions <- c("SN", "PUT", "AMY", "PFC")
for(cluster in as.character(0:7)){
gene_dds_list[[cluster]] <- list()
gene_exp_list[[cluster]] <- list()
gene_res_list[[cluster]] <- list()
gene_res_list_df[[cluster]] <- list()
for(region in regions){
print(cluster)
print(region)
gene_counts <- list()
gene_counts[[region]] <- fread(paste("~/inbox/ASAP/trusTEr_output_vs2/", region,"/gene_counts_", region,"_",cluster,".csv", sep=""), sep = ",", data.table = F)
rownames(gene_counts[[region]]) <- gene_counts[[region]]$Geneid
samplesheet_list <- split(samplesheet, f = samplesheet$Region)
rownames(samplesheet_list[[region]]) <- samplesheet_list[[region]]$Sample
samples_cluster <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
samples_cluster <- samples_cluster[which(samples_cluster %in% colnames(gene_counts[[region]]))]
rownames(samplesheet_list[[region]]) <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
rownames(gene_counts[[region]]) <- gene_counts[[region]]$Geneid
expressed_genes <- rownames(gene_counts[[region]])[which(rowSums(gene_counts[[region]][,samples_cluster]) > 0)]
gene_dds_list[[cluster]][[region]] <- DESeqDataSetFromMatrix((gene_counts[[region]][expressed_genes,samples_cluster]+1), samplesheet_list[[region]][samples_cluster,], design = ~ Dx)
gene_dds_list[[cluster]][[region]]$Dx <- relevel(gene_dds_list[[cluster]][[region]]$Dx, "Ctl")
gene_dds_list[[cluster]][[region]] <- DESeq(gene_dds_list[[cluster]][[region]])
gene_exp_list[[cluster]][[region]] <- getAverage(gene_dds_list[[cluster]][[region]])
gene_res_list[[cluster]][[region]] <- results(gene_dds_list[[cluster]][[region]], )
gene_res_list_df[[cluster]][[region]] <- as.data.frame(gene_res_list[[cluster]][[region]])
gene_res_list_df[[cluster]][[region]]$gene_id <- rownames(gene_res_list_df[[cluster]][[region]])
print(names(gene_res_list_df[[cluster]]))
}
openxlsx::write.xlsx(gene_res_list_df[[cluster]], paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/gene_cluster_", cluster, "_PD_DEA_snRNAseq_res.xlsx", sep=""))
}
## [1] "0"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## [1] "SN"
## [1] "0"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 786 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "0"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 669 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "0"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 348 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "1"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 926 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "1"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 584 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "1"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 602 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "1"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 212 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "2"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 959 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "2"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1245 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "2"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1249 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "2"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 673 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "3"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 899 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "3"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 884 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "3"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 952 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "3"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 327 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "4"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 617 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "4"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 383 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "4"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 459 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "4"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 287 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "5"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1155 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "5"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 844 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "5"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1469 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "5"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 961 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "6"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 398 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "6"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 171 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "6"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 262 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "6"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 187 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
## [1] "7"
## [1] "SN"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 305 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN"
## [1] "7"
## [1] "PUT"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 109 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT"
## [1] "7"
## [1] "AMY"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 76 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY"
## [1] "7"
## [1] "PFC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 12 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## [1] "SN" "PUT" "AMY" "PFC"
Visualize mean expression vs log2FC for each cluster-region
volcano_plots_list <- list()
mean_plots_list <- list()
for(cluster in as.character(0:7)){
volcano_plots_list[[cluster]] <- list()
mean_plots_list[[cluster]] <- list()
for(region in regions){
tmp <- EnhancedVolcano(gene_res_list[[cluster]][[region]],
drawConnectors = T,
lab = NA,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 1)
tmp$data$Sig <- ifelse(tmp$data$log2FoldChange > 1 & tmp$data$Sig == "FC_P", "Upregulated",
ifelse(tmp$data$log2FoldChange < -1 & tmp$data$Sig == "FC_P", "Downregulated", "Not significant"))
tmp$data$Sig <- paste(tmp$data$Sig, table(tmp$data$Sig)[tmp$data$Sig], sep = ": ")
not_sig_lab <- unique(tmp$data$Sig)[startsWith(unique(tmp$data$Sig), "Not significant")]
up_sig_lab <- unique(tmp$data$Sig)[startsWith(unique(tmp$data$Sig), "Upregulated")]
down_sig_lab <- unique(tmp$data$Sig)[startsWith(unique(tmp$data$Sig), "Downregulated")]
if(length(unique(tmp$data$Sig)) == 1){
point_colours <- c("grey")
}
if(length(unique(tmp$data$Sig)) == 2){
if(any(str_detect(unique(tmp$data$Sig), "Upregulated"))){
point_colours <- c("grey","tomato")
}
}
if(length(unique(tmp$data$Sig)) == 2){
if(any(str_detect(unique(tmp$data$Sig), "Downregulated"))){
point_colours <- c("grey","darkturquoise")
}
}
if(length(unique(tmp$data$Sig)) == 3){
point_colours <- c("grey","darkturquoise", "tomato")
names(point_colours) <- c(not_sig_lab, down_sig_lab, up_sig_lab)
}
point_sizes <- c(1,2,2)
names(point_sizes) <- c(not_sig_lab, down_sig_lab, up_sig_lab)
title <- paste(region, cluster, sep=": cluster ")
tmp <- tmp$data
volcano_plots_list[[cluster]][[region]] <- ggplot(tmp, aes(x=log2FoldChange, y=-log10(padj), colour=Sig, size=Sig)) + geom_point(alpha=0.7) +
geom_vline(xintercept = 1, linetype="dotted") + geom_vline(xintercept = -1, linetype="dotted") +
geom_hline(yintercept = -log10(0.05), linetype="dotted") + labs(colour="") +
scale_color_manual(values = point_colours) +
scale_size_manual(values = point_sizes, guide="none") +
theme_bw() +
theme(text = element_text(size=15), panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +
ggtitle(title)
effect <- unique(samplesheet_list[[region]][which(samplesheet_list[[region]]$Dx != "Ctl"),"Dx"])
mean_plots_list[[cluster]][[region]] <- meanPlot_cus(exp = gene_exp_list[[cluster]][[region]]$Mean,
test = gene_res_list[[cluster]][[region]],
col1 = ifelse(length(unique(tmp$Sig)) == 3, "tomato",
ifelse(any(str_detect(unique(tmp$Sig), "Downregulated")), "grey", "darkturquoise")),
col2 = ifelse(length(unique(tmp$Sig)) == 3, "darkturquoise",
ifelse(any(str_detect(unique(tmp$Sig), "Downregulated")), "darkturquoise", "grey")),
col3 = ifelse(length(unique(tmp$Sig)) == 3, "grey",
ifelse(any(str_detect(unique(tmp$Sig), "Upregulated")), "tomato", "grey")),
c1 = effect, c2="Ctl", p = 0.05, l=1, ttl = title, repel = F)
}
}
mean_plots_list
## $`0`
## $`0`$SN
## Warning: Removed 6653 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$PUT
## Warning: Removed 35564 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$AMY
## Warning: Removed 36930 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$PFC
## Warning: Removed 42538 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`1`
## $`1`$SN
## Warning: Removed 23823 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$PUT
## Warning: Removed 31791 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$AMY
## Warning: Removed 35127 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$PFC
## Warning: Removed 38408 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`2`
## $`2`$SN
## Warning: Removed 33216 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$PUT
## Warning: Removed 35867 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$AMY
## Warning: Removed 35539 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$PFC
## Warning: Removed 36141 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`3`
## $`3`$SN
## Warning: Removed 30840 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$PUT
## Warning: Removed 30380 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$AMY
## Warning: Removed 32243 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$PFC
## Warning: Removed 30826 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`4`
## $`4`$SN
## Warning: Removed 27428 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$PUT
## Warning: Removed 27085 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$AMY
## Warning: Removed 27022 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$PFC
## Warning: Removed 24930 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`5`
## $`5`$SN
## Warning: Removed 37824 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$PUT
## Warning: Removed 35104 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$AMY
## Warning: Removed 34018 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$PFC
## Warning: Removed 34609 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`6`
## $`6`$SN
## Warning: Removed 19241 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$PUT
## Warning: Removed 16485 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$AMY
## Warning: Removed 18119 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$PFC
## Warning: Removed 19972 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`7`
## $`7`$SN
## Warning: Removed 16272 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$PUT
## Warning: Removed 11434 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$AMY
## Warning: Removed 12453 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$PFC
## Warning: Removed 9317 rows containing missing values or values outside the scale range
## (`geom_point()`).
set.seed(10)
gse_dotplot <- function(df){
genelist <- df[which(!is.na(df$log2FoldChange)),c("log2FoldChange", "gene_id"), drop=F]
genelist <- genelist[order(genelist$log2FoldChange, decreasing = T),]
genelist_FC <- genelist$log2FoldChange
names(genelist_FC) <- genelist$gene_id
gse <- gseGO(geneList=genelist_FC,
ont ="ALL",
keyType = "SYMBOL",
minGSSize = 3,
maxGSSize = 800,
seed = T,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Hs.eg.db,
pAdjustMethod = "BH")
return(gse)
}
For some reason the seed is not taken in by clusterProfiler, so the results from here onwards will be using the original run I did on this code (same but ever so slightly different numbers, loaded here at the end of the chunk). The clean data provided will be the original run I did.
gse_list <- list()
gse_list_df <- list()
gse_plot_list <- list()
for(cluster in as.character(0:7)){
gse_list[[cluster]] <- list()
gse_list_df[[cluster]] <- list()
gse_plot_list[[cluster]] <- list()
for(region in regions){
print(cluster)
print(region)
gse_list[[cluster]][[region]] <- gse_dotplot(df = gene_res_list_df[[cluster]][[region]])
gse_list_df[[cluster]][[region]] <- gse_list[[cluster]][[region]]@result
if(!is.null(gse_plot_list[[cluster]][[region]])){
# gse_plot_list[[cluster]][[region]] <- gse_pretty_dotplot(gse = gse_list[[cluster]][[region]], topn = 30) + ggtitle(paste(region, cluster, sep=" cluster: "))
}
}
if(length(gse_list_df[[cluster]]) > 0){
openxlsx::write.xlsx(gse_list_df[[cluster]], paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/gse_cluster", cluster, "PD_GSEA_snRNAseq_res.xlsx", sep="_"))
}
}
## [1] "0"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (90.46% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## no term enriched under specific pvalueCutoff...
## [1] "0"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (24.56% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "0"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (23.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 100 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "0"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (13.49% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (29.53% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (23.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 8 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (19.24% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "1"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.48% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.53% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.49% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 11 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (19.45% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "2"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.85% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.33% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (19.44% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.23% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "3"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (18.69% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (21.06% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (21.23% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 26 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (22.65% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "4"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (21.79% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "5"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (13.13% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "5"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (14.73% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "5"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (16.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "5"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (15.51% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "6"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (28.86% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "6"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (33.31% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "6"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "6"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (27.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 2 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## leading edge analysis...
## done...
## [1] "7"
## [1] "SN"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.27% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "7"
## [1] "PUT"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (52.72% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## leading edge analysis...
## done...
## [1] "7"
## [1] "AMY"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (62.73% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 7 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## [1] "7"
## [1] "PFC"
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (63.48% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 2 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
# save.image("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/src/r_scripts/gene_uniq_DEA.Rdata")
load("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/src/r_scripts/gene_uniq_DEA.Rdata")
This chunk is just to show you that the results of the gene DEA are indeed identical as before
mean_plots_list
## $`0`
## $`0`$SN
## Warning: Removed 6653 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$PUT
## Warning: Removed 35564 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$AMY
## Warning: Removed 36930 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`0`$PFC
## Warning: Removed 42538 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`1`
## $`1`$SN
## Warning: Removed 23823 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$PUT
## Warning: Removed 31791 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$AMY
## Warning: Removed 35127 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`1`$PFC
## Warning: Removed 38408 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`2`
## $`2`$SN
## Warning: Removed 33216 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$PUT
## Warning: Removed 35867 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$AMY
## Warning: Removed 35539 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`2`$PFC
## Warning: Removed 36141 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`3`
## $`3`$SN
## Warning: Removed 30840 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$PUT
## Warning: Removed 30380 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$AMY
## Warning: Removed 32243 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`3`$PFC
## Warning: Removed 30826 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`4`
## $`4`$SN
## Warning: Removed 27428 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$PUT
## Warning: Removed 27085 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$AMY
## Warning: Removed 27022 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`4`$PFC
## Warning: Removed 24930 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`5`
## $`5`$SN
## Warning: Removed 37824 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$PUT
## Warning: Removed 35104 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$AMY
## Warning: Removed 34018 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`5`$PFC
## Warning: Removed 34609 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`6`
## $`6`$SN
## Warning: Removed 19241 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$PUT
## Warning: Removed 16485 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$AMY
## Warning: Removed 18119 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`6`$PFC
## Warning: Removed 19972 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
##
## $`7`
## $`7`$SN
## Warning: Removed 16272 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$PUT
## Warning: Removed 11434 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$AMY
## Warning: Removed 12453 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## $`7`$PFC
## Warning: Removed 9317 rows containing missing values or values outside the scale range
## (`geom_point()`).
gse_pretty_dotplot <- function(gse, topn = 10, terms = NA){
gse <- gse@result
# gse <- gse[which(gse$ONTOLOGY == "BP"),]
gse$sign <- ifelse(gse$enrichmentScore > 0, "Activated", "Supressed")
gse$num_genes <- sapply(str_split(gse$core_enrichment, "/"), length)
gse$gene_ratio <- gse$num_genes / gse$setSize
gse$Description <- factor(gse$Description, levels = gse[order(gse$gene_ratio), "Description"])
if(!all(is.na(terms))){
gse <- gse %>%
drop_na() %>%
filter(ID %in% terms)
}else{
gse <- gse[order(gse$NES, -log10(gse$p.adjust), decreasing = T),]
}
gse %>%
drop_na() %>%
ggplot(aes(x=Description, y=gene_ratio, size = num_genes, fill = p.adjust)) + geom_point(shape=21, colour="lightgrey") + facet_wrap(.~sign) + coord_flip() + theme_pubr(legend = "right", border = T) + labs(x="", fill="Padj", y = "Gene ratio", size = "Num genes") + scale_fill_gradientn(colors = c("firebrick1", "gray94")) + scale_size_continuous(range = c(2,8)) + theme(axis.text.y = element_text(size=10), strip.text.x = element_text(size = 10),axis.text.x = element_text(size=10),legend.text = element_text(size=10),legend.title = element_text(size=10))
}